clear all 
capture log close


global data "/Users/ceweber/Dropbox/JPAM_TIV_code/original_data/"
global dta "/Users/ceweber/Dropbox/JPAM_TIV_code/dta_files/"
global dataclean "/Users/ceweber/Dropbox/JPAM_TIV_code/dta_output/"
global output "/Users/ceweber/Dropbox/JPAM_TIV_code/output/"

 
set more off
cd "${data}"

use dispensing.dta

cd "${dataclean}"


* restrict to usable marijuana transactions (ignores prerolls for now)
keep if inventorytype == 28
drop if deleted == 1
drop if refunded == 1

* drop insane prices
drop if price <= 0.01
drop if price >= 500

* generate monthly variables
gen date=dofC(sessiontime)
format date %d
gen monthly=mofd(date)
gen weekly=wofd(date)

* small number of months for now
*keep if monthly >= 663 & monthly <= 668

* collapse by location, month, and price
gen one = 1
collapse (count) num_trans = one, by(location monthly price)

sort location monthly price

* we're going to need a single panel identifier
egen panel = group(location monthly)

order location monthly panel price num_trans, first

* need to join in the tax rates stuff

 
rename location locid

* get the tax locality in there
cd "$dta"
merge m:1 locid using locations-to-tax-locality, keep(match master)
drop _merge


* now get the rates
gen quarter = qofd(dofm(monthly))

merge m:1 locality_code quarter using allrates, keep(match master)
drop _merge
cd "$dataclean"

rename locid location
saveold input_for_round_price_calculations.dta, replace


clear all
use input_for_round_price_calculations, clear


mata

	// Define function that takes an array of prices and num_trans, as well as a ratio to test, and returns the percent round
	// data is expected to be in form [price, num_trans]
	scalar PercentRound(data, ratio, level) {
		// get the new prices implied by the ratio
		new_price = data[., 1] :* ratio
		
		// round them and then check
		round_price = round(new_price, level)
		check = abs(new_price - round_price) :<= 0.01
		
		// how much do these new prices match?
		conform = sum(check :* data[.,2])
		round_amount = conform / sum(data[.,2])
		
		return(round_amount)
	}
end

mata	
	// function that finds the best ratio given some data
	function FindBestRatios(data, level) {
		best_amount = PercentRound(data, 1.0, level)
		best_ratio = 1.0
		
		second_best_amount = 0.0
		second_best_ratio = 0.0
		//printf("Starting with best_amount = %f \n", best_amount)
		
		for (ratio=1.001; ratio <= 1.499; ratio = ratio + 0.001) {
			test_amount = PercentRound(data, ratio, level)
			//printf("ratio = %f \t test = %f \t br = %f \t ba = %f \t 2br = %f \t 2ba = %f\n", ratio, test_amount,best_ratio, best_amount, second_best_ratio, second_best_amount)
			
			if (test_amount > best_amount) {
				// first, kick back the previous best to the second best, but only if there is some difference between what WOULD be the second best and what we just tested
				if (abs(ratio - best_ratio) > 0.002) {
					second_best_amount = best_amount
					second_best_ratio = best_ratio
				}
				// either way, replace the best thing
				best_amount = test_amount
				best_ratio = ratio
			} else if (test_amount > second_best_amount) {
				// kick it back only if the difference will be big enough
				if (abs(ratio - best_ratio) > 0.002) {
					second_best_amount = test_amount
					second_best_ratio = ratio
				}
				
			}
			/*
			// first, if we haven't improved on the starting point yet, we still need to be checking against the second amount
			if (best_ratio == 1) {
				// and if we haven't done anything at ALL yet, better stick something in there. If we DO get something better, we'll end up overwriting this anyway.
				if (second_best_ratio == 1) {
					second_best_amount = test_amount
					second_best_ratio = ratio
				}
				// now check to see if we need to replace what's already in there
				if (test_amount > second_best_amount) {
					second_best_amount = test_amount
					second_best_ratio = ratio
				}
				
			}
			*/
		}
		
		out = (best_ratio, best_amount, second_best_ratio, second_best_amount)
		
		return(out)
	}

end


//keep if location == 241
//keep if monthly == 667

// get all the data into mata
sort location monthly panel price
mata
	all_data = st_data(., "panel price num_trans monthly combined_rate")
end

// now we want to change the dataset to just be location and monthly
keep location monthly
bysort location monthly: keep if _n == 1

// gen the new variables
gen best_ratio_quarters = .
gen best_amount_quarters = .
gen second_best_ratio_quarters = .
gen second_best_amount_quarters = .
gen round_zero_quarters = .

gen best_ratio_whole = .
gen best_amount_whole = .
gen second_best_ratio_whole = .
gen second_best_amount_whole = .
gen round_zero_whole = .

gen round_modal_firm_quarters = .
gen round_modal_firm_whole = .

mata
	// now turn this into a panel
	info = panelsetup(all_data, 1)
	// how many panels are we dealing with?
	num_panels = rows(info)
	
	// allocate space for output
	best_ratio_quarters = J(num_panels,1,.)
	best_amount_quarters = J(num_panels,1,.)
	second_best_ratio_quarters = J(num_panels,1,.)
	second_best_amount_quarters = J(num_panels,1,.)
	round_zero_quarters = J(num_panels,1,.)
	
	best_ratio_whole = J(num_panels,1,.)
	best_amount_whole = J(num_panels,1,.)
	second_best_ratio_whole = J(num_panels,1,.)
	second_best_amount_whole = J(num_panels,1,.)
	round_zero_whole = J(num_panels,1,.)
	
	round_modal_firm_quarters = J(num_panels,1,.)
	round_modal_firm_whole = J(num_panels,1,.)
	
	// loop through the panels and find the thing
	for (i = 1; i <= num_panels; i++) {
		printf("Working on panel %f out of %f...\t",i, num_panels)
		panelsubview(panel_data = ., all_data, i, info)
		
		// only need to send the second couple of columns
		out = FindBestRatios(panel_data[., 2..3], 0.25)
		out_whole = FindBestRatios(panel_data[., 2..3], 1.0)
		
		best_ratio_quarters[i] = out[1]
		best_amount_quarters[i] = out[2]
		second_best_ratio_quarters[i] = out[3]
		second_best_amount_quarters[i] = out[4]
		round_zero_quarters[i] = PercentRound(panel_data[., 2..3],1.0, 0.25)
		
		best_ratio_whole[i] = out_whole[1]
		best_amount_whole[i] = out_whole[2]
		second_best_ratio_whole[i] = out_whole[3]
		second_best_amount_whole[i] = out_whole[4]
		round_zero_whole[i] = PercentRound(panel_data[., 2..3],1.0, 1.0)
		
		monthly = panel_data[1,4]
		
		modal_rate = panel_data[1,5] + 1.00 + .37 * (monthly >= 666)
		
		round_modal_firm_quarters[i] = PercentRound(panel_data[., 2..3], modal_rate,0.25)
		round_modal_firm_whole[i] = PercentRound(panel_data[., 2..3], modal_rate,1.0)
		
		
		
		
		printf("Got optimal_ratio = %f, round_amount = %f\n",best_ratio_quarters[i], best_amount_quarters[i])
	}
	
	// ok now we need to upload that data to stata
	st_store(., "best_ratio_quarters", best_ratio_quarters)
	st_store(., "best_amount_quarters", best_amount_quarters)
	st_store(., "second_best_ratio_quarters", second_best_ratio_quarters)
	st_store(., "second_best_amount_quarters", second_best_amount_quarters)
	st_store(., "round_zero_quarters", round_zero_quarters)
	
	st_store(., "best_ratio_whole", best_ratio_whole)
	st_store(., "best_amount_whole", best_amount_whole)
	st_store(., "second_best_ratio_whole", second_best_ratio_whole)
	st_store(., "second_best_amount_whole", second_best_amount_whole)
	st_store(., "round_zero_whole", round_zero_whole)
	
	st_store(., "round_modal_firm_quarters", round_modal_firm_quarters)
	st_store(., "round_modal_firm_whole", round_modal_firm_whole)
	
end

* save the output
compress
saveold rounding_results, replace


*****************************
**Weekly*****
*****************************

clear
cd "$data"
use dispensing.dta
cd "$dataclean"

* restrict to usable marijuana transactions (ignores prerolls for now)
keep if inventorytype == 28
drop if deleted == 1
drop if refunded == 1

* drop insane prices
drop if price <= 0.01
drop if price >= 500

* generate monthly variables
gen date=dofC(sessiontime)
format date %d
gen monthly=mofd(date)
gen weekly = wofd(date)

* small number of months for now
*keep if monthly >= 663 & monthly <= 668

* collapse by location, week, and price
gen one = 1
collapse (count) num_trans = one, by(location weekly price)

sort location weekly price

* we're going to need a single panel identifier
egen panel = group(location weekly)

order location weekly panel price num_trans, first

* need to join in the tax rates stuff
 
rename location locid

cd "$dta"
* get the tax locality in there
merge m:1 locid using locations-to-tax-locality, keep(match master)
drop _merge

* now get the rates
gen quarter = qofd(dofm(weekly))

merge m:1 locality_code quarter using allrates, keep(match master)
drop _merge
cd "$dataclean"


rename locid location
saveold input_for_round_price_calculations_weekly.dta, replace


clear all
use input_for_round_price_calculations_weekly, clear


mata

	// Define function that takes an array of prices and num_trans, as well as a ratio to test, and returns the percent round
	// data is expected to be in form [price, num_trans]
	scalar PercentRound(data, ratio, level) {
		// get the new prices implied by the ratio
		new_price = data[., 1] :* ratio
		
		// round them and then check
		round_price = round(new_price, level)
		check = abs(new_price - round_price) :<= 0.01
		
		// how much do these new prices match?
		conform = sum(check :* data[.,2])
		round_amount = conform / sum(data[.,2])
		
		return(round_amount)
	}
end

mata	
	// function that finds the best ratio given some data
	function FindBestRatios(data, level) {
		best_amount = PercentRound(data, 1.0, level)
		best_ratio = 1.0
		
		second_best_amount = 0.0
		second_best_ratio = 0.0
		//printf("Starting with best_amount = %f \n", best_amount)
		
		for (ratio=1.001; ratio <= 1.499; ratio = ratio + 0.001) {
			test_amount = PercentRound(data, ratio, level)
			//printf("ratio = %f \t test = %f \t br = %f \t ba = %f \t 2br = %f \t 2ba = %f\n", ratio, test_amount,best_ratio, best_amount, second_best_ratio, second_best_amount)
			
			if (test_amount > best_amount) {
				// first, kick back the previous best to the second best, but only if there is some difference between what WOULD be the second best and what we just tested
				if (abs(ratio - best_ratio) > 0.002) {
					second_best_amount = best_amount
					second_best_ratio = best_ratio
				}
				// either way, replace the best thing
				best_amount = test_amount
				best_ratio = ratio
			} else if (test_amount > second_best_amount) {
				// kick it back only if the difference will be big enough
				if (abs(ratio - best_ratio) > 0.002) {
					second_best_amount = test_amount
					second_best_ratio = ratio
				}
				
			}
			/*
			// first, if we haven't improved on the starting point yet, we still need to be checking against the second amount
			if (best_ratio == 1) {
				// and if we haven't done anything at ALL yet, better stick something in there. If we DO get something better, we'll end up overwriting this anyway.
				if (second_best_ratio == 1) {
					second_best_amount = test_amount
					second_best_ratio = ratio
				}
				// now check to see if we need to replace what's already in there
				if (test_amount > second_best_amount) {
					second_best_amount = test_amount
					second_best_ratio = ratio
				}
				
			}
			*/
		}
		
		out = (best_ratio, best_amount, second_best_ratio, second_best_amount)
		
		return(out)
	}

end


//keep if location == 241
//keep if monthly == 667

// get all the data into mata
sort location weekly panel price
mata
	all_data = st_data(., "panel price num_trans weekly combined_rate")
end

// now we want to change the dataset to just be location and weekly
keep location weekly
bysort location weekly: keep if _n == 1

// gen the new variables
gen best_ratio_quarters = .
gen best_amount_quarters = .
gen second_best_ratio_quarters = .
gen second_best_amount_quarters = .
gen round_zero_quarters = .

gen best_ratio_whole = .
gen best_amount_whole = .
gen second_best_ratio_whole = .
gen second_best_amount_whole = .
gen round_zero_whole = .

gen round_modal_firm_quarters = .
gen round_modal_firm_whole = .

mata
	// now turn this into a panel
	info = panelsetup(all_data, 1)
	// how many panels are we dealing with?
	num_panels = rows(info)
	
	// allocate space for output
	best_ratio_quarters = J(num_panels,1,.)
	best_amount_quarters = J(num_panels,1,.)
	second_best_ratio_quarters = J(num_panels,1,.)
	second_best_amount_quarters = J(num_panels,1,.)
	round_zero_quarters = J(num_panels,1,.)
	
	best_ratio_whole = J(num_panels,1,.)
	best_amount_whole = J(num_panels,1,.)
	second_best_ratio_whole = J(num_panels,1,.)
	second_best_amount_whole = J(num_panels,1,.)
	round_zero_whole = J(num_panels,1,.)
	
	round_modal_firm_quarters = J(num_panels,1,.)
	round_modal_firm_whole = J(num_panels,1,.)
	
	// loop through the panels and find the thing
	for (i = 1; i <= num_panels; i++) {
		printf("Working on panel %f out of %f...\t",i, num_panels)
		panelsubview(panel_data = ., all_data, i, info)
		
		// only need to send the second couple of columns
		out = FindBestRatios(panel_data[., 2..3], 0.25)
		out_whole = FindBestRatios(panel_data[., 2..3], 1.0)
		
		best_ratio_quarters[i] = out[1]
		best_amount_quarters[i] = out[2]
		second_best_ratio_quarters[i] = out[3]
		second_best_amount_quarters[i] = out[4]
		round_zero_quarters[i] = PercentRound(panel_data[., 2..3],1.0, 0.25)
		
		best_ratio_whole[i] = out_whole[1]
		best_amount_whole[i] = out_whole[2]
		second_best_ratio_whole[i] = out_whole[3]
		second_best_amount_whole[i] = out_whole[4]
		round_zero_whole[i] = PercentRound(panel_data[., 2..3],1.0, 1.0)
		
		monthly = panel_data[1,4]
		
		modal_rate = panel_data[1,5] + 1.00 + .37 * (monthly >= 666)
		
		round_modal_firm_quarters[i] = PercentRound(panel_data[., 2..3], modal_rate,0.25)
		round_modal_firm_whole[i] = PercentRound(panel_data[., 2..3], modal_rate,1.0)
		
		
		
		
		printf("Got optimal_ratio = %f, round_amount = %f\n",best_ratio_quarters[i], best_amount_quarters[i])
	}
	
	// ok now we need to upload that data to stata
	st_store(., "best_ratio_quarters", best_ratio_quarters)
	st_store(., "best_amount_quarters", best_amount_quarters)
	st_store(., "second_best_ratio_quarters", second_best_ratio_quarters)
	st_store(., "second_best_amount_quarters", second_best_amount_quarters)
	st_store(., "round_zero_quarters", round_zero_quarters)
	
	st_store(., "best_ratio_whole", best_ratio_whole)
	st_store(., "best_amount_whole", best_amount_whole)
	st_store(., "second_best_ratio_whole", second_best_ratio_whole)
	st_store(., "second_best_amount_whole", second_best_amount_whole)
	st_store(., "round_zero_whole", round_zero_whole)
	
	st_store(., "round_modal_firm_quarters", round_modal_firm_quarters)
	st_store(., "round_modal_firm_whole", round_modal_firm_whole)
	
end

	rename best_ratio_quarters best_ratio_quarters_w
	rename best_amount_quarters best_amount_quarters_w
	rename second_best_ratio_quarters second_best_ratio_quarters_w
	rename second_best_amount_quarters second_best_amount_quarters_w
	rename round_zero_quarters round_zero_quarters_w
	rename best_ratio_whole best_ratio_whole_w
	rename best_amount_whole best_amount_whole_w
	rename second_best_ratio_whole second_best_ratio_whole_w
	rename second_best_amount_whole second_best_amount_whole_w
	rename round_zero_whole round_zero_whole_w
	rename round_modal_firm_quarters round_modal_firm_quarters_w
	rename round_modal_firm_whole round_modal_firm_whole_w
	
* save the output
save rounding_results_weekly, replace

*/
**************************
***Now select a ratio****
*************************
*/
use "rounding_results.dta", clear //results from monthly rounding
rename best_ratio_whole best_ratio
rename second_best_ratio_whole second_best_ratio
rename best_amount_whole best_amount
rename second_best_amount_whole second_best_amount


/*add tax information */
*merge in tax rates for each store by geo
	cd "$dta"
	mmerge location using "locations-to-tax-locality.dta", umatch(locid) type(n:1) unmatched(master)  
	gen timed = dofm(monthly) 
	gen quarter = qofd(timed)
	mmerge locality_code quarter using "allrates.dta", umatch(locality_code quarter)   type(n:1) unmatched(master)
	drop _merge timed quarter state_rate locality_code
	cd "$dataclean"

*create a dummy for after tax change
	gen TaxChange=0
	replace TaxChange=1 if monthly>=666 //remember it's in stata time

*create the real rate
	sort location monthly
	gen sales_excise=1.25*(1+combined_rate) if monthly<=666
	replace sales_excise=(1+.37+combined_rate) if monthly>=666 //these are the real rates

	
**********************
*Select a best ratio*
*********************

/*Choose the best ratio for pre-tax change region and post-tax change region*/

*pick the ratio to use by looking at mode of months before and after tax change
	gen tmp3=best_ratio 
	egen tmp8=mode(tmp3), by(location TaxChange) //mode of ratio before and after tax change
	gen chosen_amount=tmp8 //let the chosen_amount be these modes 
	gen missing=0 // create a flag if there is no chosen_amount value 
	replace missing=1 if chosen_amount==.

*gen second-best chosen amount (whenever not equal to first best)
	gen tmp6=second_best_ratio if tmp3!=second_best_ratio
	egen second_best_chosen=mode(tmp6), by(location TaxChange) //mode of second_best_ratio ratio before and after tax change
	gen missing2=0 // create a flag if there is no second_best_chosen value value
	replace missing2=1 if second_best_chosen==.

*gen the best quarters ratio amount (we have defaulted to whole ratios)  
	gen tmp7=best_ratio_quarters
	egen chosen_amount_quarters=mode(tmp7), by(location TaxChange) 
	drop tmp*
	
*replace the whole amount with quarters if the quarters matches better
	sort location TaxChange
	by location TaxChange: egen best_amount_mean = mean(best_amount)
	by location TaxChange: egen best_amount_quarters_mean = mean(best_amount_quarters)	
	by location TaxChange: replace chosen_amount=chosen_amount_quarters if best_amount_quarters_mean-best_amount_mean>=.15&best_amount_mean!=.&(chosen_amount_quarters!=chosen_amount)
	gen quarters_flag = 0
	by location TaxChange: replace quarters_flag=1 if best_amount_quarters_mean-best_amount_mean>=.15&best_amount_mean!=.&(chosen_amount_quarters!=chosen_amount) //flag locations that are going to use the quarters ratio instead of the whole 
	
*compare the chosen amount to feasible multipliers. This is a list of tax rates, we flag if the chosen_amount matches one of those.
	gen none=0
	replace none=1 if abs(chosen_amount-1)<.01			
	gen excise_tax=0
	replace excise_tax=1 if abs(chosen_amount-1.25)<=.01 &monthly<=665
	replace excise_tax=1 if abs(chosen_amount-1.37)<=.01&monthly>=666
	gen sales_tax=0
	replace sales_tax=1 if abs(chosen_amount-(1+combined_rate))<=.01
	gen sales_tax_state=0
	replace sales_tax_state=1 if abs(chosen_amount-(1+.065))<=.01
	gen sales_tax_local=0
	replace sales_tax_local=1 if abs(chosen_amount-(1+local_rate))<=.01
	gen all=0
	replace all=1 if abs(chosen_amount-1.25*(1+combined_rate))<=.01 &monthly<=665
	replace all=1 if abs(chosen_amount-(1+.37+combined_rate))<=.01&monthly>=666
	gen allwrong=0
	replace allwrong=1 if abs(chosen_amount-(1+.25+combined_rate))<=.01&monthly<=665
	replace allwrong=1 if abs(chosen_amount-1.37*(1+combined_rate))<=.01&monthly>=666
	gen nomatch=none==0&excise_tax==0&sales_tax==0&sales_tax_state==0&sales_tax_local==0&all==0&allwrong==0&missing==0 //Flag locations that have chosen_amounts that are not any of the reasonable rates listed above


/*manual decisions*/ 

*this group I am following quarters over whole
	replace chosen_amount=chosen_amount_quarters if location==746
		replace quarters_flag=1 if location==746
	/*replace chosen_amount=chosen_amount_quarters if location==778 //this is now redundant
			replace quarters_flag=1 if location==778*/
	replace chosen_amount=chosen_amount_quarters if location==308
			replace quarters_flag=1 if location==308
	/*replace chosen_amount=chosen_amount_quarters if location==494 //this is now redundant
			replace quarters_flag=1 if location==494*/
	replace chosen_amount=chosen_amount_quarters if location==284
			replace quarters_flag=1 if location==284
	replace chosen_amount=chosen_amount_quarters if location==612
			replace quarters_flag=1 if location==612
	/*		
	replace chosen_amount=chosen_amount_quarters if location==1121 //this is now redundant
			replace quarters_flag=1 if location==1121*/
	replace chosen_amount=chosen_amount_quarters if location==1140 
		replace quarters_flag=1 if location==1140
		
*other
	replace chosen_amount=1.000 if TaxChange==0&location==322
	replace chosen_amount=1.000 if TaxChange==0&location==423
	replace chosen_amount=1.085 if TaxChange==0&location==443
	replace chosen_amount=1.060 if TaxChange==1&location==494
	replace chosen_amount=1.000 if TaxChange==0&location==615
	replace chosen_amount=1.000 if TaxChange==0&location==670
	replace chosen_amount=1.370 if TaxChange==1&location==670
	replace chosen_amount=1.455 if TaxChange==1&location==787
	replace chosen_amount=1.465 if TaxChange==1&location==1564
	replace chosen_amount=1.000 if TaxChange==1&location==1632
	replace chosen_amount=1.465 if TaxChange==1&location==1405&monthly<=686
	replace chosen_amount=1.471 if TaxChange==1&location==1405&monthly>=687
	replace chosen_amount=1.465 if TaxChange==1&location==1473
	
	


**********************
*Select a best ratio*
*********************
*The previous code assumes a constant optimal rounder for the pre-tax period and the post tax-period. We now look for changes in this optimal rounder outside of that month, 


/*We now check for structural breaks in the optimal chosen_amount.*/

sort location monthly
gen best_ratio_change = 0
bysort location : replace best_ratio_change = best_ratio[_n]- best_ratio[_n-1] if quarters_flag==0 //detect changes in the best_ratio over time, within location
bysort location : replace best_ratio_change = best_ratio_quarters[_n]- best_ratio_quarters[_n-1] if quarters_flag==1 //detect changes in the best_ratio over time, within location

*TAX CHANGES
*detect changes in combined rate, allow a new mode every time there is a tax change. accept the new mode if the difference between the new mode and the old mode matches the difference between the old combined_rate and the new one
	bysort location : gen rate_change = combined_rate[_n]-combined_rate[_n-1] //detect changes in the combined_rate over time, within location
	gen tmp = 0
	bysort location : replace tmp=1 if rate_change!=0 //indicator if there is a rate change
	egen tmp2 = group (location monthly) if tmp==1 //create a new value for tmp each time there is a tax rate switch, there may be multiple switches, this gives us unique values of tmp2 to separate the mode calculations  
	carryforward tmp2, replace //fill in all subsequent values of tmp2 for that tax rate "regime" as the same value
	bysort location TaxChange tmp2: egen tmp3 = mode(best_ratio) // construct a mode for each "tax regime"
	bysort location TaxChange tmp2: egen tmp31 = mode(best_ratio_quarters)
	gen tmp312 = 0
	replace tmp312 = tmp3 if quarters_flag == 0
	replace tmp312 = tmp31 if quarters_flag == 1

	*accept these new modes if the difference between them matches the difference between the combined_rates
	bysort location: gen tmp4 = tmp312[_n]-tmp312[_n-1] //the diff in the modes  
	gen rate_change_round = round(rate_change, .001) //convert to round so stata will recognize they match
	gen tmp4_round=round(tmp4, .001) //convert to round so stata will recognize they match
	
	gen tax_switch = . //gen dummy to show if the change in combined_rate is equal to the change in best_ratio and neither of these are changes==0
	replace tax_switch=1 if (rate_change_round==tmp4_round)&rate_change_round!=0&rate_change_round!=.
	
	replace chosen_amount = tmp312 if tax_switch==1 //replace chosen_amount with the new mode if the difference between them matches (throwing out cases where they both have change=0). This is only one month. 
	
	sort location TaxChange monthly //we now need to propogate that change forward
	by location TaxChange: carryforward(tax_switch) if tmp2[_n]==tmp2[_n-1], replace //let tax_switch be 1 for all subsquent months in that tax regime
	bysort location: replace chosen_amount = tmp312 if (tmp312[_n]==tmp312[_n-1] | tmp312[_n+1]==tmp312[_n])&chosen_amount[_n-1]!=chosen_amount[_n]&tax_switch==1 //make sure every chosen_amount within a regime with a tax switch matches/ we are propogating the switches forwards

	bysort location: replace chosen_amount = tmp312 if rate_change_round[_n+1]==tmp4_round[_n+1]&rate_change_round[_n+1]!=0&rate_change_round[_n+1]!=. //repeat this replacement process for months right before the rate_change and mode_change match. This is because the changing might actually need to be for the months before the combined_rate change, not the months after (location 229) 
	gsort location TaxChange -monthly
	bysort location TaxChange: replace chosen_amount=chosen_amount[_n-1] if tmp2==tmp2[_n-1] //backwards fill the first observation before the tax change to all months that are part of the same tax regime on the same side of the TaxChange (229 problem)
	
	sort location TaxChange
	bysort location: replace chosen_amount = chosen_amount[_n+1] if best_ratio_change==.&tax_switch==1 //deal with the first observation for a location, just make sure it matches the rest of the tax regime it's in 

*OTHER KINDS OF STRUCTURAL BREAKS
*we look for structural breaks that don't coincide with a tax change
	sort location TaxChange monthly
	gen structural_flag=0 //deviation from the chosen_amount for two months in a row and the best_ratios for those two months match
		bysort location TaxChange: replace structural_flag=1 if (best_ratio!=chosen_amount)&(best_ratio[_n+1]!=chosen_amount[_n+1])&(best_ratio[_n]==best_ratio[_n+1])&chosen_amount!=.&quarters_flag==0 //create a flag if at least two months in a row have a best_ratio that does not match the chosen_amount and these best_ratios match each other 
		bysort location TaxChange: replace structural_flag=1 if (best_ratio_quarters!=chosen_amount)&(best_ratio_quarters[_n+1]!=chosen_amount[_n+1])&(best_ratio_quarters[_n]==best_ratio_quarters[_n+1])&chosen_amount!=.&quarters_flag==1 //create a flag if at least two months in a row have a best_ratio that does not match the chosen_amount and these best_ratios match each other 

		bysort location TaxChange: replace structural_flag=0 if [_n]==1 //drop if this is in the first two months
		bysort location TaxChange: replace structural_flag=1 if best_ratio[_n-1]==best_ratio[_n]&structural_flag[_n-1]==1&quarters_flag==0 //make sure every structural_flag is 1 for that series
		bysort location TaxChange: replace structural_flag=1 if best_ratio_quarters[_n-1]==best_ratio_quarters[_n]&structural_flag[_n-1]==1&quarters_flag==1 //make sure every structural_flag is 1 for that series
		bysort location TaxChange: replace structural_flag=1 if monthly==666&structural_flag[_n+1]==1 //make sure every structural_flag is 1 in that series. These are being caculated on either side of the tax change, that means the first month after that tax change is always 0. We are replacing with 1 if it's best_ratio matches the next_month and the next_month has a flag. 
	
	*allow the structural break through if the difference between the modes is the same as the combined_rate (looks like they are just adding combined rate into their recording system)
	gen diff=.
	replace diff=abs(chosen_amount-best_ratio) if structural_flag==1&quarters_flag==0 //for structural break periods, calculate the difference between the best_ratio and chosen_amount
	replace diff=abs(chosen_amount-best_ratio_quarters) if structural_flag==1&quarters_flag==1 //for structural break periods, calculate the difference between the best_ratio and chosen_amount
	gen combined_rate_round = round(combined_rate, .001) //need to round in order to compare
	gen diff_round = round(diff, .001) //need to round in order to compare
	gen match_flag = 0 
	replace match_flag=1 if combined_rate_round==diff_round //flag if the difference between the best_ratio and the chosen_amount is the same as the combined_rate
	replace chosen_amount = best_ratio if match_flag==1&quarters_flag==0 //replace chosen_amount with best_ratio if the difference between the best_ratio and the chosen_amount is the same as the combined_rate
	replace chosen_amount = best_ratio_quarters if match_flag==1&quarters_flag==1 //replace chosen_amount with best_ratio if the difference between the best_ratio and the chosen_amount is the same as the combined_rate
	
	*allow the structural break through if happens for at least three months and a best_amount associated with the new mode is above 70% on average 
	gen three_month_flag=0 //flag for consistent break for at least three months
	bysort location TaxChange: replace three_month_flag = 1 if structural_flag[_n]==1&structural_flag[_n+1]==1&structural_flag[_n+2]==1&chosen_amount!=. //structural flag must be turned on three months in a row and chosen_amount is not a .
	bysort location TaxChange: replace three_month_flag = 1 if three_month_flag[_n-1]==1&structural_flag==1 //make sure we have flagged every entry in that series of best_ratio matches
	
		*best_amount_whole
		sort location three_month_flag monthly
		by location three_month_flag: egen three_month_mean = mean(best_amount) //get a mean for roundness amount for that series of best_ratio matches
		replace chosen_amount = best_ratio if three_month_flag==1&three_month_mean>=.70 //allow chosen_amount to be switched if the roundness is over 70%
	
		*best_amount_quarters
		by location three_month_flag: egen three_month_quarters_mean = mean(best_amount_quarters) //get a mean for roundness amount for that series of best_ratio_quarter matches
		replace chosen_amount = best_ratio_quarters if three_month_flag==1&three_month_quarters_mean>=.70 //allow chosen_amount to be switched if the roundness is over 70%
		

/*manual decisions*/	
*whats left as having a structural break where the new best_ratio lasts for at least three months?
	gen structural_flag2=0 //deviation from the chosen_amount for two months in a row and the best_ratios for those two months match
	bysort location: replace structural_flag2=1 if (best_ratio!=chosen_amount)&(best_ratio_quarters!=chosen_amount)&(best_ratio[_n+1]!=chosen_amount[_n+1])&(best_ratio_quarters[_n+1]!=chosen_amount[_n+1])&(best_ratio[_n]==best_ratio[_n+1])&chosen_amount!=.&quarters_flag==0
	bysort location: replace structural_flag2=0 if [_n]==1 //drop if this is in the first two months
	bysort location: replace structural_flag2=1 if best_ratio[_n-1]==best_ratio[_n]&structural_flag2[_n-1]==1 //make sure every structural_flag is 1 for that series
	
	*decisions made by KSM KH CW
	replace chosen_amount=1.000 if location==1761&monthly>=657&monthly<=659
	replace chosen_amount=1.078 if location==1761&monthly>=660
	replace chosen_amount=1.085 if location==787&monthly<=665
	
	*KH 11/13/17
	replace chosen_amount=1.455 if location==787&monthly>=666&monthly<=683	
	replace chosen_amount=1.458 if location==787&monthly>=666&monthly>=684
	
	* additional manual decisions made by KSM and KH on 2017-08-14 based on who ends up with missing chosen_amount at the end of this whole thing *
	replace chosen_amount = 1 if location == 399 & monthly <= 662
	replace chosen_amount = 1.07 if location == 399 & monthly >662 & monthly < 666
	replace chosen_amount = 1.456 if location == 1482 & monthly > 665
	replace chosen_amount = 1.025 if location == 1877 & monthly > 665
	replace chosen_amount = 1.466 if location == 1898 & monthly > 665
	
	*manual decision 10/28/18 KH
	replace chosen_amount=1 if location==307&monthly<=660
	replace chosen_amount=1.08 if location==307&monthly>=661&monthly<=665
	
	sort location monthly

/*
*check for potential structural breaks at the end of the data
gen lastmonthflag=0
	replace lastmonthflag=1 if best_ratio!=chosen_amount&monthly==678
	tab location if lastmonthflag==1&chosen_amount!=.

gen secondlastmonthflag=0
	replace secondlastmonthflag=1 if best_ratio!=chosen_amount&monthly==677
	tab location if secondlastmonthflag==1&chosen_amount!=.
	*/

sort location monthly
save "round_amounts_monthly.dta", replace

**********************
*Refine to weekly level*
*********************	
/*
Some structural changes occur at the weekly level, not monthly. In order to allow
our code to make changes at this level, we bring in weekly data. 
*/

/*Bring in other data */
*bring in daily level data. Weeks and months don't match exactly, but each day has a unique month and a unique week.
cd "$data"
	
	use "dispensing.dta", clear
	
cd "$dataclean"


	gen date=dofC(sessiontime)
	format date %d
	gen monthly=mofd(date)
	gen weekly = wofd(date + 1)

	bysort location date: keep if _n == 1
	drop id orgid sessiontime transactionid itemnumber inventoryid inventorytype weight usableweight price deleted refunded transactionid_original

	cd "$dataclean"
	mmerge location monthly using "round_amounts_monthly.dta", type(n:1) unmatched(both) umatch(location monthly)
	tab _merge, gen(merge)
	gen notinbestamountsmonthly = 1 if merge1==1
	drop merge*

	//drop if best_ratio==. 

*merge in the weekly best_ratio_whole and best_ratio_quarters (best_ratio_whole_w) and (best_ratio_quarters_w)
	mmerge location weekly using "rounding_results_weekly.dta", type(n:1) unmatched(both) umatch(location weekly) ukeep(best_ratio_quarters_w best_ratio_whole_w)
	save "round_amounts_week.dta", replace
	
	
	use "round_amounts_week.dta", clear
	tab _merge, gen(merge)
	gen notinbestamountsweekly = 1 if merge1==1
	gen onlyinbestamountsweekly = 1 if merge2==1
	//drop if monthly==. 


/*Identify where the changes in the weekly best_ratio are and apply forward at the weekly level. This occurs within the two-month window identified above.*/
*identify where breaks in the chosen_amount are 
	sort location monthly weekly 
	gen break = 0 //create flag to identify where there are breaks are in chosen_amount
		bysort location: replace break = 1 if chosen_amount[_n]!=chosen_amount[_n-1] //turn flag to 1 if chosen_amount switches between rows(months) within a location
		bysort location: replace break = 0 if [_n]==1 //drop if the first month of data for the location
		bysort location: replace break = 0 if monthly==666 // we are choosing to make all TaxChange switches occur at the first of the month. This is consistent with the data. WE COULD TURN THIS OFF AND CHECK?

*create a unique identifier for each two month period where we are looking for the weekly structural break 
	egen break_months = group (location monthly) if break==1 //create a unique identifier for each break
		bysort location: carryforward(break_months) if monthly==monthly[_n-1], replace //carryforward this unique value for every entry in the second of the two months (the month that the chosen_amount change is currently happening at)
		gsort location -monthly weekly //reverse the order to use carryforward to go backwards
		bysort location: carryforward(break_months) if monthly==(monthly[_n-1] - 1), replace //carryforward this unique value to the month before the chosen_amount change
		bysort location: carryforward(break_months) if monthly==monthly[_n-1], replace //carryforward this unique value to every entry in this first month (the month before the chosen_amount change)

*within that two month period, flag the last time the weekly best_ratio becomes the second month's best ratio 
	sort location break_months monthly weekly
	by location break_months: gen switch_quarters = best_ratio_quarters_w[_n]-best_ratio_quarters_w[_n-1] if break_months!=. //give me all the times the best_rate_w switches within that two month period (QUARTERS)
	by location break_months: gen switch_whole = best_ratio_whole_w[_n]-best_ratio_whole_w[_n-1] if break_months!=. //give me all the times the best_rate_w switches within that two month period (WHOLE)

	gen rate_switch_match=0 //generate when the weekly best_ratio switches move us to the ending chosen_amount 
	by location break_months: replace rate_switch_match=1 if (switch_quarters!=0 | switch_whole !=0 )&(best_ratio_whole==chosen_amount[_N] | best_ratio_quarters==chosen_amount[_N])&break_months!=. 
	by location break_months: egen switch = max(weekly) if rate_switch_match==1 //pull the last time the best_ratio switches to the chosen_amount that ends this period, this gives us the week to switch at

	egen shoot = sum(rate_switch_match), by(break_months) 
	replace shoot = . if break_months==.
	
*now that we have the week, we need to apply this as the proper chosen_amount switch time. With the monthly data above we've already applied the proper ratio outside of this period. As a result, we ony need about it being applied correctly within these 2 month periods we've already identified.  
	by location break_months: carryforward(switch), replace //carryforward this weekly amount to every row in this break_month period
	sort location break_months switch //we need to "carrybackward" so we reverse sort and then carryforward 
	by location: carryforward(switch), replace //"carrybackward"

	sort location TaxChange break_months monthly weekly 
	by location TaxChange break_months : replace chosen_amount=chosen_amount[_N] if (weekly>=switch)&(switch[_n-1]==switch[_n]) //replace with the final chosen_amount in that period if the weekly is greater than or equal to the switch week
	by location TaxChange break_months : replace chosen_amount=chosen_amount[1] if (weekly<switch) // replace with the beginning chosen_amount if weekly is less than the switch point 


/*Manual*/
* some of these are commented out because they previously were manual changes
* and we are now smart enough to catch them programmatically

	//replace chosen_amount=1.086 if location==243&weekly>=2862&TaxChange==0
	//replace chosen_amount=1.083 if location==243&weekly<2862&TaxChange==0

	//replace chosen_amount=1.087 if location==460&weekly>=2868&TaxChange==0
	//replace chosen_amount=1 if location==460&weekly<2868&TaxChange==0 

	//replace chosen_amount=1.37 if location==498&weekly<2905&TaxChange==1
	//replace chosen_amount=1.465 if location==498&weekly>=2905&TaxChange==1 

	replace chosen_amount=1.37 if location==510&weekly<2897&TaxChange==1 
	//replace chosen_amount=1.485 if location==510&weekly>=2897&TaxChange==1 

	//replace chosen_amount=1.078 if location==514&weekly<2876&TaxChange==0
	//replace chosen_amount=1.079 if location==514&weekly>=2876&TaxChange==0

	replace chosen_amount=1.11 if location==571&weekly<2877&TaxChange==0
	replace chosen_amount=1.22 if location==571&weekly>=2876&TaxChange==0 

	replace chosen_amount=1 if location==685&weekly<2929&TaxChange==1
	//replace chosen_amount=1.466 if location==685&weekly>=2929&TaxChange==1 

	replace chosen_amount=1.37 if location==705&weekly<2894&TaxChange==1 
	//replace chosen_amount=1.456 if location==705&weekly>=2894&TaxChange==1 

	replace chosen_amount=1.457 if location==793&weekly<2894&TaxChange==1
	//replace chosen_amount=1.449 if location==793&weekly>=2894&TaxChange==1 

	replace chosen_amount=1.338 if location==891&weekly<2893&TaxChange==1 
	//replace chosen_amount=1.465 if location==891&weekly>=2893&TaxChange==1 

	replace chosen_amount=1.45 if location==964&weekly<2909&TaxChange==1 
	replace chosen_amount=1.456 if location==964&weekly<2909&TaxChange==1
 
	replace chosen_amount=1 if location==265&weekly<2908&TaxChange==1
	//replace chosen_amount=1.451 if location==265&weekly>=2908&TaxChange==1 

	replace chosen_amount=1.095 if location==449&weekly<2882&TaxChange==0
	replace chosen_amount=1.096 if location==449&weekly>=2882&TaxChange==0

	//replace chosen_amount=1.095 if location==528&weekly<2873&TaxChange==0 
	replace chosen_amount=1.096 if location==528&weekly>=2873&TaxChange==0

	replace chosen_amount=1.37 if location==543&weekly<2899&TaxChange==1
	//replace chosen_amount=1.454 if location==543&weekly>=2899&TaxChange==1 

	//replace chosen_amount=1 if location==556&weekly<2873&TaxChange==0 
	replace chosen_amount=1.081 if location==556&weekly>=2873&TaxChange==0

	//replace chosen_amount=1.37 if location==564&weekly<2912&TaxChange==1 
	//replace chosen_amount=1.465 if location==564&weekly>=2912&TaxChange==1 

	replace chosen_amount=1.465 if location==741&weekly<2930&TaxChange==1
	//replace chosen_amount=1.468 if location==741&weekly>=2930&TaxChange==1 

	replace chosen_amount=1.37 if location==1148&weekly<2912&TaxChange==1
	//replace chosen_amount=1.466 if location==1148&weekly>=2912&TaxChange==1




/*check that everything has a rounding amount */
gen flag3=1 if chosen_amount==.
egen maxflag3=max(flag3), by(location)
	//THIS IS DIFFERENT NOW THAT IM NOT DROPPING monthly==. and best_ratio==.
	tab location if flag3==1
	tab location if flag3==1&notinbestamountsmonthly!=1&onlyinbestamountsweekly!=1
	*everything that we dont have a chosen_amount for is from a weird problem where the date isnt in every dataset
	tab notinbestamountsmonthly if flag3==1
	tab onlyinbestamountsweekly if flag3==1
	
	//im dropping if there is no date/its not in dispensing 
	drop if date==.
	
	*the remaining are primarily dates that were not in best_amount_weekly and not in best_amount_monthly
	*none were in best_amount_monthly and only 
	*there are 98 observations (if you are curious, "keep if flag3==1")
	*92 of these are not in weekly or monthly best amounts
	*6 of them are not in monthly, but do show up in weekly
	*im going to have them follow the closest chosen_amount
	sort location date
	bysort location: carryforward chosen_amount, gen(chosen_amount2)
	gsort location -date
	*this should have taken care of the ones that aren't at the very beginning of the data 
	*just looked, it did, i think this is taking care of "patchy data"
	replace chosen_amount = chosen_amount2 if chosen_amount==.
	drop flag3
	gen flag3=1 if chosen_amount==.
	
	gsort location -date
	bysort location: carryforward chosen_amount, gen(chosen_amount3)
	replace chosen_amount = chosen_amount3 if chosen_amount==.
	drop flag3
	gen flag3=1 if chosen_amount==.
	
	**the ones that remain are real weirdos/they exist in the data for <= 4 days (i am not going to deal with them)
	drop maxflag3
	egen maxflag3=max(flag3), by(location)
	tab location if maxflag3>=1&maxflag3!=.

/* Save it all!*/  
keep location date monthly weekly chosen_amount combined_rate best_* quarters_flag second_best_*
saveold round_amounts_prelim.dta, replace

*************we are going to make sure we identify the right day for these switches
**************
**************
use round_amounts_prelim.dta, clear
sort location date
bysort location: gen rounderswitch = 1 if chosen_amount[_n]!=chosen_amount[_n-1]
bysort location: replace rounderswitch = . if _n==1|_n==_N

tab rounderswitch
bysort location: egen sum = sum(rounderswitch)
hist sum, freq
summ sum 

**now that we have the switches identified, we will keep the data for the week before and week after each switch 
gen rounderperiod = rounderswitch
replace rounderperiod =1 if rounderswitch==1|rounderswitch[_n-1]==1|rounderswitch[_n-2]==1|rounderswitch[_n-3]==1|rounderswitch[_n-4]==1|rounderswitch[_n-5]==1|rounderswitch[_n-6]==1|rounderswitch[_n-7]==1 ///
						|rounderswitch[_n+1]==1|rounderswitch[_n+2]==1|rounderswitch[_n+3]==1|rounderswitch[_n+4]==1|rounderswitch[_n+5]==1|rounderswitch[_n+6]==1|rounderswitch[_n+7]==1
keep if rounderperiod==1

**now we gen an identifier for each switch group, and merge in the inventorylot data for that day
sort location date
gen run =_n
bysort location: replace run = run[_n-1] if date==date[_n-1]+1
egen breakperiod = group(run)

**now drop obs that are too far away 
bysort breakperiod: gen date2 = date if rounderswitch==1
bysort breakperiod: egen date3 = mean(date2)
keep if date>=date3-7&date<=date3+7

**we create a dummy for the day when the inventoryprice switches, see which day has the most of these, let that be the switch point 

	save midway.dta, replace
	
cd "$data"
use dispensing.dta
cd "$dataclean"
	keep if inventorytype==28
	
		gen date=dofC(sessiontime)

	sort location inventoryid date
	bysort location inventoryid date: egen pricemode = mode(price)
	bysort location inventoryid date: egen pricemean = mean(price)

	gen price_final = pricemode
	replace price_final = pricemean if price_final==.
	
	drop price	
	rename price_final price
	collapse (mean) price, by(inventoryid location date) 

	sort location inventoryid date
	bysort location inventoryid: gen switch = 1 if price[_n]!=price[_n-1]
	
	collapse (sum) switch, by(location date)
	save collapsed.dta
	
use midway.dta, clear
mmerge date location using collapsed, unmatched(master)
	
bysort breakperiod: egen max = max(switch)
gen here = 1 if switch==max	
bysort breakperiod: egen heres = sum(here)
bysort breakperiod: gen flag =1 if _n==1
tab heres if flag==1

		*keep as is if the switch we have matches the switch we just found 
		gen fine = 1 if rounderswitch==1&here==1
		bysort breakperiod: egen totallyfine=sum(fine)
		
		drop if totallyfine==1
		
		*deal with the others
			//there is one case where there is more than one, we will take the first max
			gen date4 = date if here==1
			sort location here date
			bysort location here: replace date4 = . if _n!=1
		bysort breakperiod:  egen date5 = mean(date4)
		
		gen after = 1 if date5>date3
		gen before = 1 if date5<date3
		bysort breakperiod: egen afterbreak = mean(after)
		bysort breakperiod: egen beforebreak = mean(before)

		*fix them 
		bysort breakperiod: replace chosen_amount=chosen_amount[_N] if date>=date5
		bysort breakperiod: replace chosen_amount=chosen_amount[1] if date<date5

**now fix the rounders		
		rename chosen_amount chosen_amount2 
		keep chosen_amount2 location date
		save rounderedits.dta, replace
	
use round_amounts_prelim.dta, clear
mmerge location date using rounderedits.dta, unmatched(master)

replace chosen_amount = chosen_amount2 if chosen_amount!=chosen_amount2&chosen_amount2!=.
drop _merge
	bysort location: egen firstdate = min(date)
	bysort location: egen lastdate = max(date)

fillin location date
	*these new obs dont have first and last date obs so i'm filling them in 
	bysort location: egen meanfirstdate = mean(firstdate)
	bysort location: egen meanlastdate = mean(lastdate)
	replace firstdate = meanfirstdate
	replace lastdate = meanlastdate
drop if date>lastdate
drop if date<firstdate
tab _fillin

	bysort location: carryforward chosen_amount, gen(chosen_amount3)
	gen thiswascarriedforward = 1 if chosen_amount==.&chosen_amount3
	replace chosen_amount = chosen_amount3 if chosen_amount==.
	
	mdesc chosen_amount
	tab location if chosen_amount==. //those same short lived firms 
keep location date chosen_amount thiswascarriedforward best_amount_quarters best_amount quarters_flag second_best_*
gen weekly=wofd(date)
save roundness1.dta, replace

****generate a roundness variable that reports how round the data is for the chosen_amounts
cd "$data"
use dispensing.dta
cd "$dataclean"


		* restrict to usable marijuana transactions (ignores prerolls for now)
		keep if inventorytype == 28
		drop if deleted == 1
		drop if refunded == 1

		* drop insane prices
		drop if price <= 0.01
		drop if price >= 500

		* generate monthly variables
		gen date=dofC(sessiontime)
		format date %d
		gen monthly=mofd(date)
		gen weekly=wofd(date)

		* collapse by location, week, and price
		gen one = 1
		collapse (count) num_trans = one, by(location weekly price)

		mmerge location weekly using roundness1.dta, ukeep(chosen_amount)
		
		gen price2 = price*chosen_amount
		
		*figure out how round these prices are
		// round them and then check
        gen round_price = round(price2)
        gen check = abs(price2 - round_price)
		gen round = 1 if check<=.01
		replace round = 0 if round==.
		
		//
		
			*total number of transactions
				//this is num_trans
			*total number of round transactions
				gen count_round = round*num_trans
				
			collapse (sum) count_round num_trans, by(location weekly)
			gen percent_round = count_round/num_trans
			
			keep percent_round location weekly
			save round2.dta, replace

use roundness1.dta
mmerge location weekly using round2.dta
save roundness.dta, replace
keep location date chosen_amount thiswascarriedforward
save round_amounts.dta, replace

